pg_replication:
  query: "SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::INT as lag, CASE WHEN pg_is_in_recovery() THEN 1 ELSE 0 END as is_replica"
  metrics:
    - lag:
        usage: "GAUGE"
        description: "Replication lag behind master in seconds"
    - is_replica:
        usage: "GAUGE"
        description: "Indicates if this host is a slave"

pg_postmaster:
  query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
  metrics:
    - start_time_seconds:
        usage: "GAUGE"
        description: "Time at which postmaster started"

<% if node['monitoring']['postgres_exporter']['per_table_stats'] -%>
pg_stat_user_tables:
  query: |
    SELECT
      current_database() datname,
      schemaname,
      relname,
      seq_scan,
      seq_tup_read,
      idx_scan,
      idx_tup_fetch,
      n_tup_ins,
      n_tup_upd,
      n_tup_del,
      n_tup_hot_upd,
      n_live_tup,
      n_dead_tup,
      GREATEST(last_autovacuum, last_vacuum, '1970-01-01Z') as last_vacuum,
      GREATEST(last_autoanalyze, last_analyze, '1970-01-01Z') as last_analyze,
      (vacuum_count + autovacuum_count) as vacuum_count,
      (analyze_count + autoanalyze_count) as analyze_count
    FROM
      pg_stat_user_tables
  metrics:
    - datname:
        usage: "LABEL"
        description: "Name of current database"
    - schemaname:
        usage: "LABEL"
        description: "Name of the schema that this table is in"
    - relname:
        usage: "LABEL"
        description: "Name of this table"
    - seq_scan:
        usage: "COUNTER"
        description: "Number of sequential scans initiated on this table"
    - seq_tup_read:
        usage: "COUNTER"
        description: "Number of live rows fetched by sequential scans"
    - idx_scan:
        usage: "COUNTER"
        description: "Number of index scans initiated on this table"
    - idx_tup_fetch:
        usage: "COUNTER"
        description: "Number of live rows fetched by index scans"
    - n_tup_ins:
        usage: "COUNTER"
        description: "Number of rows inserted"
    - n_tup_upd:
        usage: "COUNTER"
        description: "Number of rows updated"
    - n_tup_del:
        usage: "COUNTER"
        description: "Number of rows deleted"
    - n_tup_hot_upd:
        usage: "COUNTER"
        description: "Number of rows HOT updated (i.e., with no separate index update required)"
    - n_live_tup:
        usage: "GAUGE"
        description: "Estimated number of live rows"
    - n_dead_tup:
        usage: "GAUGE"
        description: "Estimated number of dead rows"
    - last_vacuum:
        usage: "GAUGE"
        description: "Last time at which this table was vacuumed (not counting VACUUM FULL)"
    - last_analyze:
        usage: "GAUGE"
        description: "Last time at which this table was analyzed"
    - vacuum_count:
        usage: "COUNTER"
        description: "Number of times this table has been vacuumed"
    - analyze_count:
        usage: "COUNTER"
        description: "Number of times this table has been analyzed"

<% end -%>
pg_total_relation_size:
  query: |
    SELECT relnamespace::regnamespace as schemaname,
           relname as relname,
           pg_total_relation_size(oid) bytes
      FROM pg_class
     WHERE relkind = 'r';
  metrics:
    - schemaname:
        usage: "LABEL"
        description: "Name of the schema that this table is in"
    - relname:
        usage: "LABEL"
        description: "Name of this table"
    - bytes:
        usage: "GAUGE"
        description: "total disk space usage for the specified table and associated indexes"

pg_blocked:
  query: |
    SELECT
      count(blocked.transactionid) AS queries,
      '__transaction__' AS table
    FROM pg_catalog.pg_locks blocked
    WHERE NOT blocked.granted AND locktype = 'transactionid'
    GROUP BY locktype
    UNION
    SELECT
      count(blocked.relation) AS queries,
      blocked.relation::regclass::text AS table
    FROM pg_catalog.pg_locks blocked
    WHERE NOT blocked.granted AND locktype != 'transactionid'
    GROUP BY relation
  metrics:
    - queries:
        usage: "GAUGE"
        description: "The current number of blocked queries"
    - table:
        usage: "LABEL"
        description: "The table on which a query is blocked"

pg_oldest_blocked:
  query: |
    SELECT coalesce(extract('epoch' from max(clock_timestamp() - state_change)), 0) age_seconds
      FROM pg_stat_activity
     WHERE wait_event_type = 'Lock'
       AND state='active'
  metrics:
    - age_seconds:
        usage: "GAUGE"
        description: "Largest number of seconds any transaction is currently waiting on a lock"

pg_slow:
  query: |
    SELECT COUNT(*) AS queries
    FROM pg_stat_activity
    WHERE state = 'active' AND (now() - query_start) > '1 seconds'::interval
  metrics:
    - queries:
        usage: "GAUGE"
        description: "Current number of slow queries"

pg_long_running_transactions:
  query: |
    SELECT COUNT(*) as transactions,
    coalesce(MAX(EXTRACT(EPOCH FROM (clock_timestamp() - xact_start))), 0) AS age_in_seconds
    FROM pg_stat_activity
    WHERE state is distinct from 'idle' AND (now() - xact_start) > '1 minutes'::interval AND query not like 'autovacuum:%'
  metrics:
    - transactions:
        usage: "GAUGE"
        description: "Current number of long running transactions"
    - age_in_seconds:
        usage: "GAUGE"
        description: "The current maximum transaction age in seconds"

pg_stuck_idle_in_transaction:
  query: |
    SELECT COUNT(*) AS queries
    FROM pg_stat_activity
    WHERE state = 'idle in transaction' AND (now() - query_start) > '10 minutes'::interval
  metrics:
    - queries:
        usage: "GAUGE"
        description: "Current number of queries that are stuck being idle in transactions"

# All xid and lsn metrics here are reported mod 2^52 to ensure they
# fit within a float for Prometheus :( Really annoying that counters
# aren't stored in a 64-bit integer. Note that for queries that report
# floats this only works because postgres_exporter does know to set
# extra_float_digits (which it sets to 2). So they don't print in
# exponential notation and precision is maintained up to 2^53-1.

pg_vacuum:
  query: |
    SELECT
      COUNT(*) AS queries,
      MAX(EXTRACT(EPOCH FROM (clock_timestamp() - query_start))) AS age_in_seconds
    FROM pg_catalog.pg_stat_activity
    WHERE state = 'active' AND trim(query) ~* '\AVACUUM (?!ANALYZE)'
  metrics:
    - queries:
        usage: "GAUGE"
        description: "The current number of VACUUM queries"
    - age_in_seconds:
        usage: "GAUGE"
        description: "The current maximum VACUUM query age in seconds"

pg_vacuum_analyze:
  query: |
    SELECT
      COUNT(*) AS queries,
      MAX(EXTRACT(EPOCH FROM (clock_timestamp() - query_start))) AS age_in_seconds
    FROM pg_catalog.pg_stat_activity
    WHERE state = 'active' AND trim(query) ~* '\AVACUUM ANALYZE'
  metrics:
    - queries:
        usage: "GAUGE"
        description: "The current number of VACUUM ANALYZE queries"
    - age_in_seconds:
        usage: "GAUGE"
        description: "The current maximum VACUUM ANALYZE query age in seconds"

pg_stuck_idle_in_transaction:
  query: |
    SELECT COUNT(*) AS queries
    FROM pg_stat_activity
    WHERE state = 'idle in transaction' AND (now() - query_start) > '10 minutes'::interval
  metrics:
    - queries:
        usage: "GAUGE"
        description: "Current number of queries that are stuck being idle in transactions"

pg_vacuum_queue:
  #master: true 
  # Until postgres_exporter is upgraded. See 
  # https://gitlab.com/gitlab-org/omnibus-gitlab/issues/4887
  query: |
    with table_opts_vs_statistic as (
      select
        pg_class.oid,
        pg_class.relname,
        coalesce(nspname, 'public') as schemaname,
        pg_class.relpages,
        pg_class.reltuples,
        case
          when array_to_string(reloptions, '') like '%autovacuum_vacuum_threshold%' then regexp_replace(array_to_string(reloptions, ''), '.*autovacuum_vacuum_threshold=([0-9.]+).*', E'\\1')::int8
          else current_setting('autovacuum_vacuum_threshold')::int8
        end as autovacuum_vacuum_threshold,
        case
          when array_to_string(reloptions, '') like '%autovacuum_vacuum_scale_factor%' then regexp_replace(array_to_string(reloptions, ''), '.*autovacuum_vacuum_scale_factor=([0-9.]+).*', E'\\1')::numeric
          else current_setting('autovacuum_vacuum_scale_factor')::numeric
        end as autovacuum_vacuum_scale_factor,
        case when array_to_string(reloptions, '') ~ 'autovacuum_enabled=(false|off)' then false else true end as autovacuum_enabled,
        n_dead_tup,
        last_autovacuum,
        last_vacuum
      from pg_class
      join pg_namespace ns on relnamespace = ns.oid 
      join pg_stat_all_tables psat  on psat.relid  = pg_class.oid
      where relkind in ('r','m') 
    ), p as (
      select pgspv.*,a.query,a.wait_event_type,a.wait_event,a.query_start
      from pg_stat_progress_vacuum pgspv
      left join pg_stat_activity a using (pid)
    ) 
    select
      table_opts_vs_statistic.schemaname as schemaname,
      table_opts_vs_statistic.relname as relname,
      round((100 * table_opts_vs_statistic.n_dead_tup::numeric / nullif(table_opts_vs_statistic.reltuples, 0))::numeric, 2) as dead_tup_pct,
      table_opts_vs_statistic.reltuples::numeric as reltuples,
      table_opts_vs_statistic.n_dead_tup,
      (relpages::bigint*8*1024) AS table_size_bytes,
      'V. Threshold:' || table_opts_vs_statistic.autovacuum_vacuum_threshold
        || ', V. Scale Factor: ' || (table_opts_vs_statistic.autovacuum_vacuum_scale_factor)::numeric *100 ||' %'
        || case when not autovacuum_enabled then ', DISABLED' else ', enabled' end as "effective_settings",
      case
        when last_autovacuum > coalesce(last_vacuum, '0001-01-01') then last_autovacuum::timestamp(0)
        when last_vacuum is not null then last_vacuum::timestamp(0)
        else null
      end as "last_vacuumed",
      case
        when last_autovacuum > coalesce(last_vacuum, '0001-01-01') then 'auto'
        when last_vacuum is not null then 'manual'
        else null
      end as "type",
      coalesce(p.phase, 'in queue') as status,
      p.pid as pid,
      coalesce  (p.query,'')as  action,
      case when p.pid is null then null else coalesce(wait_event_type ||'.'|| wait_event, 'f') end as waiting,
      round(100.0 * p.heap_blks_scanned / nullif(p.heap_blks_total, 0), 1) AS scanned_pct,
      round(100.0 * p.heap_blks_vacuumed / nullif(p.heap_blks_total, 0), 1) AS vacuumed_pct,
      extract ('epoch' from  now()-query_start) elapsed_time
    from 
    table_opts_vs_statistic
    full outer join p on p.relid = table_opts_vs_statistic.oid and p.datname = current_database()
    where
      table_opts_vs_statistic.relpages >= 8
      and  autovacuum_vacuum_threshold + (autovacuum_vacuum_scale_factor::numeric * table_opts_vs_statistic.reltuples) < table_opts_vs_statistic.n_dead_tup
  metrics:
    - schemaname:
        usage: "LABEL"
        description: "Table Schema"
    - relname:
        usage: "LABEL"
        description: "Table name"
    - dead_tup_pct:
        usage: "GAUGE"
        description: "Estimated dead tuples percent"
    - reltuples:
        usage: "GAUGE"
        description: "Number of tuples in table"
    - n_dead_tup:
        usage: "GAUGE"
        description: "Estimated number of dead tuples"
    - table_size_bytes:
        usage: "GAUGE"
        description: "Estimated table size"
    - effective_settings:
        usage: "LABEL"
        description: "Autovacuums settings"
    - last_vacuumed:
        usage: "GAUGE"
        description: "Last time at which this table was vacuumed"
    - type:
        usage: "LABEL"
        description: "Last vacuum type"
    - status:
        usage: "LABEL"
        description: "Vacuum actual status"
    - pid:
        usage: "GAUGE"
        description: "Vacuum process id"
    - action:
        usage: "LABEL"
        description: "Type of vacuum executed"
    - waiting:
        usage: "LABEL"
        description: "Vacuum queue status"
    - scanned_pct:
        usage: "GAUGE"
        description: "Estimated rows scanned percent"
    - vacuumed_pct:
        usage: "GAUGE"
        description: "Estimated vacuumed rows percent"
    - elapsed_time:
        usage: "GAUGE"
        description: "Elapsed time vacuuming (in seconds)"

#
# This query extracts marginalia comments from pg_stat_activity and provides a sampled summary of the type of
# endpoints that are actively making calls
#
pg_stat_activity_marginalia_sampler:
  query: >
    SELECT
      usename AS usename,
      a.matches[1] AS application,
      a.matches[2] AS endpoint,
      a.matches[3] AS command,
      a.wait_event AS wait_event,
      a.state AS state,
      a.wait_event_type AS wait_event_type,
      COUNT(*) active_count,
     MAX(coalesce(age_in_seconds, 0)) AS max_tx_age_in_seconds
    FROM (
      SELECT
        usename,
        regexp_matches(query, '^\s*(?:\/\*(?:application:(\w+),?)?(?:correlation_id:\w+,?)?(?:jid:\w+,?)?(?:endpoint_id:([\w/\-\.:\#\s]+),?)?.*?\*\/)?\s*(\w+)') AS matches,
        state,
        wait_event,
        wait_event_type,
        EXTRACT(EPOCH FROM (clock_timestamp() - xact_start)) AS age_in_seconds
      FROM
        pg_stat_activity
      ) a
    GROUP BY usename, application, endpoint, command, state, wait_event, wait_event_type
    ORDER BY active_count DESC
  metrics:
    - usename:
        usage: LABEL
        description: The user running the command
    - application:
        usage: LABEL
        description: Name of the application as presented in marginalia comment
    - endpoint:
        usage: LABEL
        description: Name of the web endpoint or sidekiq job as presented in marginalia comment
    - command:
        usage: LABEL
        description: The first word for the running command
    - wait_event:
        usage: LABEL
        description: Wait event  of the activity as presented by pg_stat_activity.wait_event
    - state:
        usage: LABEL
        description: State of the activity as presented by pg_stat_activity.state
    - wait_event_type:
        usage: LABEL
        description: Wait event type of the activity as presented by pg_stat_activity.wait_event_type
    - active_count:
        usage: GAUGE
        description: Number of active queries at time of sample
    - max_tx_age_in_seconds:
        usage: GAUGE
        description: Number of active queries at time of sample

# This records long running autovacuum processes. The reason we limit to
# long-running processes is to avoid cardinality problems in prometheus. Since
# we're only concerned about long running processes we limit the output only to
# those that take longer than 5 minutes.
pg_stat_activity_autovacuum:
  query: >
    SELECT
      SPLIT_PART(query, '.', 2) AS relname,
      EXTRACT(EPOCH FROM (clock_timestamp() - xact_start)) AS age_in_seconds
    FROM
      pg_stat_activity
    WHERE
      query like 'autovacuum:%'
    AND
      EXTRACT(EPOCH FROM (clock_timestamp() - xact_start)) > 300
  metrics:
    - relname:
        usage: LABEL
        description: The table being vacuumed
    - age_in_seconds:
        usage: GAUGE
        description: The age of the vacuum process in seconds

# Keep track of the number of the total number of autovacuum workers
# currently active
pg_stat_activity_autovacuum_active:
  query: >
    SELECT v.phase,
           CASE
             when a.query ~ '^autovacuum.*to prevent wraparound' then 'wraparound'
             when a.query ~* '^vacuum' then 'user'
             when a.pid is null then null
             ELSE 'regular'
           END as mode,
           count(1) as workers_count
      FROM pg_stat_progress_vacuum v
      LEFT JOIN pg_stat_activity a using (pid)
     GROUP BY 1,2
  metrics:
    - phase:
        usage: LABEL
        description: Vacuum phase
    - mode:
        usage: LABEL
        description: Vacuum mode
    - workers_count:
        usage: GAUGE
        description: The number of active autovacuum workers in this state

#
# This query extracts marginalia metadata from pg_stat_activity and provides a sampled summary about the long running transactions.
#
pg_long_running_transactions_marginalia:
  query: >
    SELECT
      activity.matches[1] AS application,
      activity.matches[2] AS endpoint,
      MAX(age_in_seconds) AS max_age_in_seconds
    FROM (
      SELECT
        regexp_matches(query, '^\s*(?:\/\*(?:application:(\w+),?)?(?:correlation_id:\w+,?)?(?:jid:\w+,?)?(?:endpoint_id:([\w/\-\.:\#\s]+),?)?.*?\*\/)?\s*(\w+)') AS matches,
        EXTRACT(EPOCH FROM (clock_timestamp() - xact_start)) AS age_in_seconds
      FROM
        pg_stat_activity
      WHERE state <> 'idle'
        AND (clock_timestamp() - xact_start) > '30 seconds'::interval
        AND query NOT LIKE 'autovacuum:%'
      ) activity
    GROUP BY application, endpoint
    ORDER BY max_age_in_seconds DESC
  metrics:
    - application:
        usage: LABEL
        description: Name of the application as presented in marginalia comment
    - endpoint:
        usage: LABEL
        description: Name of the web endpoint or sidekiq job as presented in marginalia comment
    - max_age_in_seconds:
        usage: GAUGE
        description: The current maximum transaction age in seconds
